<!DOCTYPE html>
<html lang="" xml:lang="">
<head>

  <meta charset="utf-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=edge" />
  <title>Chapter 2 An Introduction to Machine Learning with R | An Introduction to Machine Learning with R</title>
  <meta name="description" content="An hands-on introduction to machine learning with R." />
  <meta name="generator" content="bookdown 0.17 and GitBook 2.6.7" />

  <meta property="og:title" content="Chapter 2 An Introduction to Machine Learning with R | An Introduction to Machine Learning with R" />
  <meta property="og:type" content="book" />
  
  
  <meta property="og:description" content="An hands-on introduction to machine learning with R." />
  <meta name="github-repo" content="lgatto/IntroMachineLearningWithR" />

  <meta name="twitter:card" content="summary" />
  <meta name="twitter:title" content="Chapter 2 An Introduction to Machine Learning with R | An Introduction to Machine Learning with R" />
  
  <meta name="twitter:description" content="An hands-on introduction to machine learning with R." />
  

<meta name="author" content="Laurent Gatto" />


<meta name="date" content="2020-02-28" />

  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="apple-mobile-web-app-capable" content="yes" />
  <meta name="apple-mobile-web-app-status-bar-style" content="black" />
  
  
<link rel="prev" href="index.html"/>
<link rel="next" href="example-datasets.html"/>
<script src="libs/jquery-2.2.3/jquery.min.js"></script>
<link href="libs/gitbook-2.6.7/css/style.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-table.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-bookdown.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-highlight.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-search.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-fontsettings.css" rel="stylesheet" />
<link href="libs/gitbook-2.6.7/css/plugin-clipboard.css" rel="stylesheet" />









<script src="libs/htmlwidgets-1.5.1/htmlwidgets.js"></script>
<link href="libs/datatables-css-0.0.0/datatables-crosstalk.css" rel="stylesheet" />
<script src="libs/datatables-binding-0.12/datatables.js"></script>
<link href="libs/dt-core-1.10.20/css/jquery.dataTables.min.css" rel="stylesheet" />
<link href="libs/dt-core-1.10.20/css/jquery.dataTables.extra.css" rel="stylesheet" />
<script src="libs/dt-core-1.10.20/js/jquery.dataTables.min.js"></script>
<link href="libs/crosstalk-1.0.0/css/crosstalk.css" rel="stylesheet" />
<script src="libs/crosstalk-1.0.0/js/crosstalk.min.js"></script>


<style type="text/css">
a.sourceLine { display: inline-block; line-height: 1.25; }
a.sourceLine { pointer-events: none; color: inherit; text-decoration: inherit; }
a.sourceLine:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode { white-space: pre; position: relative; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
code.sourceCode { white-space: pre-wrap; }
a.sourceLine { text-indent: -1em; padding-left: 1em; }
}
pre.numberSource a.sourceLine
  { position: relative; left: -4em; }
pre.numberSource a.sourceLine::before
  { content: attr(data-line-number);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; pointer-events: all; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {  }
@media screen {
a.sourceLine::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>

</head>

<body>



  <div class="book without-animation with-summary font-size-2 font-family-1" data-basepath=".">

    <div class="book-summary">
      <nav role="navigation">

<ul class="summary">
<li class="chapter" data-level="1" data-path="index.html"><a href="index.html"><i class="fa fa-check"></i><b>1</b> Preface</a><ul>
<li class="chapter" data-level="1.1" data-path="index.html"><a href="index.html#caution"><i class="fa fa-check"></i><b>1.1</b> Caution</a></li>
<li class="chapter" data-level="1.2" data-path="index.html"><a href="index.html#installation"><i class="fa fa-check"></i><b>1.2</b> Installation</a></li>
<li class="chapter" data-level="1.3" data-path="index.html"><a href="index.html#license"><i class="fa fa-check"></i><b>1.3</b> License</a></li>
<li class="chapter" data-level="1.4" data-path="index.html"><a href="index.html#contact"><i class="fa fa-check"></i><b>1.4</b> Contact</a></li>
</ul></li>
<li class="chapter" data-level="2" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html"><i class="fa fa-check"></i><b>2</b> An Introduction to Machine Learning with R</a><ul>
<li class="chapter" data-level="2.1" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html#objectives-and-pre-requisites"><i class="fa fa-check"></i><b>2.1</b> Objectives and pre-requisites</a></li>
<li class="chapter" data-level="2.2" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html#why-r"><i class="fa fa-check"></i><b>2.2</b> Why R?</a></li>
<li class="chapter" data-level="2.3" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html#overview-of-machine-learning-ml"><i class="fa fa-check"></i><b>2.3</b> Overview of machine learning (ML)</a></li>
<li class="chapter" data-level="2.4" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html#material-and-methods"><i class="fa fa-check"></i><b>2.4</b> Material and methods</a><ul>
<li class="chapter" data-level="2.4.1" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html#example-data"><i class="fa fa-check"></i><b>2.4.1</b> Example data</a></li>
<li class="chapter" data-level="2.4.2" data-path="an-introduction-to-machine-learning-with-r.html"><a href="an-introduction-to-machine-learning-with-r.html#packages"><i class="fa fa-check"></i><b>2.4.2</b> Packages</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="3" data-path="example-datasets.html"><a href="example-datasets.html"><i class="fa fa-check"></i><b>3</b> Example datasets</a><ul>
<li class="chapter" data-level="3.1" data-path="example-datasets.html"><a href="example-datasets.html#edgar-andersons-iris-data"><i class="fa fa-check"></i><b>3.1</b> Edgar Anderson’s Iris Data</a></li>
<li class="chapter" data-level="3.2" data-path="example-datasets.html"><a href="example-datasets.html#motor-trend-car-road-tests"><i class="fa fa-check"></i><b>3.2</b> Motor Trend Car Road Tests</a></li>
<li class="chapter" data-level="3.3" data-path="example-datasets.html"><a href="example-datasets.html#sub-cellular-localisation"><i class="fa fa-check"></i><b>3.3</b> Sub-cellular localisation</a></li>
<li class="chapter" data-level="3.4" data-path="example-datasets.html"><a href="example-datasets.html#the-diamonds-data"><i class="fa fa-check"></i><b>3.4</b> The diamonds data</a></li>
<li class="chapter" data-level="3.5" data-path="example-datasets.html"><a href="example-datasets.html#the-sonar-data"><i class="fa fa-check"></i><b>3.5</b> The Sonar data</a></li>
<li class="chapter" data-level="3.6" data-path="example-datasets.html"><a href="example-datasets.html#housing-values-in-suburbs-of-boston"><i class="fa fa-check"></i><b>3.6</b> Housing Values in Suburbs of Boston</a></li>
<li class="chapter" data-level="3.7" data-path="example-datasets.html"><a href="example-datasets.html#customer-churn"><i class="fa fa-check"></i><b>3.7</b> Customer churn</a></li>
</ul></li>
<li class="chapter" data-level="4" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html"><i class="fa fa-check"></i><b>4</b> Unsupervised Learning</a><ul>
<li class="chapter" data-level="4.1" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#introduction"><i class="fa fa-check"></i><b>4.1</b> Introduction</a></li>
<li class="chapter" data-level="4.2" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#k-means-clustering"><i class="fa fa-check"></i><b>4.2</b> k-means clustering</a><ul>
<li class="chapter" data-level="4.2.1" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#how-does-k-means-work"><i class="fa fa-check"></i><b>4.2.1</b> How does k-means work</a></li>
<li class="chapter" data-level="4.2.2" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#model-selection"><i class="fa fa-check"></i><b>4.2.2</b> Model selection</a></li>
<li class="chapter" data-level="4.2.3" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#how-to-determine-the-number-of-clusters"><i class="fa fa-check"></i><b>4.2.3</b> How to determine the number of clusters</a></li>
</ul></li>
<li class="chapter" data-level="4.3" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#hierarchical-clustering"><i class="fa fa-check"></i><b>4.3</b> Hierarchical clustering</a><ul>
<li class="chapter" data-level="4.3.1" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#how-does-hierarchical-clustering-work"><i class="fa fa-check"></i><b>4.3.1</b> How does hierarchical clustering work</a></li>
<li class="chapter" data-level="4.3.2" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#defining-clusters"><i class="fa fa-check"></i><b>4.3.2</b> Defining clusters</a></li>
</ul></li>
<li class="chapter" data-level="4.4" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#pre-processing"><i class="fa fa-check"></i><b>4.4</b> Pre-processing</a></li>
<li class="chapter" data-level="4.5" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#principal-component-analysis-pca"><i class="fa fa-check"></i><b>4.5</b> Principal component analysis (PCA)</a><ul>
<li class="chapter" data-level="4.5.1" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#how-does-it-work"><i class="fa fa-check"></i><b>4.5.1</b> How does it work</a></li>
<li class="chapter" data-level="4.5.2" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#visualisation"><i class="fa fa-check"></i><b>4.5.2</b> Visualisation</a></li>
<li class="chapter" data-level="4.5.3" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#data-pre-processing"><i class="fa fa-check"></i><b>4.5.3</b> Data pre-processing</a></li>
<li class="chapter" data-level="4.5.4" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#final-comments-on-pca"><i class="fa fa-check"></i><b>4.5.4</b> Final comments on PCA</a></li>
</ul></li>
<li class="chapter" data-level="4.6" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#t-distributed-stochastic-neighbour-embedding"><i class="fa fa-check"></i><b>4.6</b> t-Distributed Stochastic Neighbour Embedding</a><ul>
<li class="chapter" data-level="4.6.1" data-path="unsupervised-learning.html"><a href="unsupervised-learning.html#parameter-tuning"><i class="fa fa-check"></i><b>4.6.1</b> Parameter tuning</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="5" data-path="supervised-learning.html"><a href="supervised-learning.html"><i class="fa fa-check"></i><b>5</b> Supervised Learning</a><ul>
<li class="chapter" data-level="5.1" data-path="supervised-learning.html"><a href="supervised-learning.html#introduction-1"><i class="fa fa-check"></i><b>5.1</b> Introduction</a></li>
<li class="chapter" data-level="5.2" data-path="supervised-learning.html"><a href="supervised-learning.html#preview"><i class="fa fa-check"></i><b>5.2</b> Preview</a></li>
<li class="chapter" data-level="5.3" data-path="supervised-learning.html"><a href="supervised-learning.html#model-performance"><i class="fa fa-check"></i><b>5.3</b> Model performance</a><ul>
<li class="chapter" data-level="5.3.1" data-path="supervised-learning.html"><a href="supervised-learning.html#in-sample-and-out-of-sample-error"><i class="fa fa-check"></i><b>5.3.1</b> In-sample and out-of-sample error</a></li>
<li class="chapter" data-level="5.3.2" data-path="supervised-learning.html"><a href="supervised-learning.html#cross-validation"><i class="fa fa-check"></i><b>5.3.2</b> Cross-validation</a></li>
</ul></li>
<li class="chapter" data-level="5.4" data-path="supervised-learning.html"><a href="supervised-learning.html#classification-performance"><i class="fa fa-check"></i><b>5.4</b> Classification performance</a><ul>
<li class="chapter" data-level="5.4.1" data-path="supervised-learning.html"><a href="supervised-learning.html#confusion-matrix"><i class="fa fa-check"></i><b>5.4.1</b> Confusion matrix</a></li>
<li class="chapter" data-level="5.4.2" data-path="supervised-learning.html"><a href="supervised-learning.html#receiver-operating-characteristic-roc-curve"><i class="fa fa-check"></i><b>5.4.2</b> Receiver operating characteristic (ROC) curve</a></li>
<li class="chapter" data-level="5.4.3" data-path="supervised-learning.html"><a href="supervised-learning.html#auc-in-caret"><i class="fa fa-check"></i><b>5.4.3</b> AUC in <code>caret</code></a></li>
</ul></li>
<li class="chapter" data-level="5.5" data-path="supervised-learning.html"><a href="supervised-learning.html#random-forest"><i class="fa fa-check"></i><b>5.5</b> Random forest</a><ul>
<li class="chapter" data-level="5.5.1" data-path="supervised-learning.html"><a href="supervised-learning.html#decision-trees"><i class="fa fa-check"></i><b>5.5.1</b> Decision trees</a></li>
<li class="chapter" data-level="5.5.2" data-path="supervised-learning.html"><a href="supervised-learning.html#training-a-random-forest"><i class="fa fa-check"></i><b>5.5.2</b> Training a random forest</a></li>
</ul></li>
<li class="chapter" data-level="5.6" data-path="supervised-learning.html"><a href="supervised-learning.html#data-pre-processing-1"><i class="fa fa-check"></i><b>5.6</b> Data pre-processing</a><ul>
<li class="chapter" data-level="5.6.1" data-path="supervised-learning.html"><a href="supervised-learning.html#missing-values"><i class="fa fa-check"></i><b>5.6.1</b> Missing values</a></li>
<li class="chapter" data-level="5.6.2" data-path="supervised-learning.html"><a href="supervised-learning.html#median-imputation"><i class="fa fa-check"></i><b>5.6.2</b> Median imputation</a></li>
<li class="chapter" data-level="5.6.3" data-path="supervised-learning.html"><a href="supervised-learning.html#knn-imputation"><i class="fa fa-check"></i><b>5.6.3</b> kNN imputation</a></li>
</ul></li>
<li class="chapter" data-level="5.7" data-path="supervised-learning.html"><a href="supervised-learning.html#scaling-and-centering"><i class="fa fa-check"></i><b>5.7</b> Scaling and centering</a><ul>
<li class="chapter" data-level="5.7.1" data-path="supervised-learning.html"><a href="supervised-learning.html#multiple-pre-processing-methods"><i class="fa fa-check"></i><b>5.7.1</b> Multiple pre-processing methods</a></li>
</ul></li>
<li class="chapter" data-level="5.8" data-path="supervised-learning.html"><a href="supervised-learning.html#model-selection-1"><i class="fa fa-check"></i><b>5.8</b> Model selection</a><ul>
<li class="chapter" data-level="5.8.1" data-path="supervised-learning.html"><a href="supervised-learning.html#glmnet-model"><i class="fa fa-check"></i><b>5.8.1</b> <code>glmnet</code> model</a></li>
<li class="chapter" data-level="5.8.2" data-path="supervised-learning.html"><a href="supervised-learning.html#random-forest-model"><i class="fa fa-check"></i><b>5.8.2</b> random forest model</a></li>
<li class="chapter" data-level="5.8.3" data-path="supervised-learning.html"><a href="supervised-learning.html#knn-model"><i class="fa fa-check"></i><b>5.8.3</b> kNN model</a></li>
<li class="chapter" data-level="5.8.4" data-path="supervised-learning.html"><a href="supervised-learning.html#support-vector-machine-model"><i class="fa fa-check"></i><b>5.8.4</b> Support vector machine model</a></li>
<li class="chapter" data-level="5.8.5" data-path="supervised-learning.html"><a href="supervised-learning.html#naive-bayes"><i class="fa fa-check"></i><b>5.8.5</b> Naive Bayes</a></li>
<li class="chapter" data-level="5.8.6" data-path="supervised-learning.html"><a href="supervised-learning.html#comparing-models"><i class="fa fa-check"></i><b>5.8.6</b> Comparing models</a></li>
<li class="chapter" data-level="5.8.7" data-path="supervised-learning.html"><a href="supervised-learning.html#pre-processing-1"><i class="fa fa-check"></i><b>5.8.7</b> Pre-processing</a></li>
<li class="chapter" data-level="5.8.8" data-path="supervised-learning.html"><a href="supervised-learning.html#predict-using-the-best-model"><i class="fa fa-check"></i><b>5.8.8</b> Predict using the best model</a></li>
</ul></li>
</ul></li>
<li class="chapter" data-level="6" data-path="final-notes.html"><a href="final-notes.html"><i class="fa fa-check"></i><b>6</b> Final notes</a><ul>
<li class="chapter" data-level="6.1" data-path="final-notes.html"><a href="final-notes.html#other-learning-algorithms"><i class="fa fa-check"></i><b>6.1</b> Other learning algorithms</a><ul>
<li class="chapter" data-level="" data-path="final-notes.html"><a href="final-notes.html#semi-supervised-learning"><i class="fa fa-check"></i>Semi-supervised learning</a></li>
<li class="chapter" data-level="" data-path="final-notes.html"><a href="final-notes.html#deep-learning-in-r"><i class="fa fa-check"></i>Deep learning in R</a></li>
</ul></li>
<li class="chapter" data-level="6.2" data-path="final-notes.html"><a href="final-notes.html#model-performance-1"><i class="fa fa-check"></i><b>6.2</b> Model performance</a></li>
<li class="chapter" data-level="6.3" data-path="final-notes.html"><a href="final-notes.html#credit-and-acknowledgements"><i class="fa fa-check"></i><b>6.3</b> Credit and acknowledgements</a></li>
<li class="chapter" data-level="6.4" data-path="final-notes.html"><a href="final-notes.html#references-and-further-reading"><i class="fa fa-check"></i><b>6.4</b> References and further reading</a></li>
<li class="chapter" data-level="6.5" data-path="final-notes.html"><a href="final-notes.html#session-information"><i class="fa fa-check"></i><b>6.5</b> Session information</a></li>
</ul></li>
</ul>

      </nav>
    </div>

    <div class="book-body">
      <div class="body-inner">
        <div class="book-header" role="navigation">
          <h1>
            <i class="fa fa-circle-o-notch fa-spin"></i><a href="./">An Introduction to Machine Learning with R</a>
          </h1>
        </div>

        <div class="page-wrapper" tabindex="-1" role="main">
          <div class="page-inner">

            <section class="normal" id="section-">
<div id="an-introduction-to-machine-learning-with-r" class="section level1">
<h1><span class="header-section-number">Chapter 2</span> An Introduction to Machine Learning with R</h1>
<p>This introductory workshop on machine learning with R is aimed at
participants who are not experts in machine learning (introductory
material will be presented as part of the course), but have some
familiarity with scripting in general and R in particular. The
workshop will offer a hands-on overview of typical machine learning
applications in R, including unsupervised (clustering, such as
hierarchical and k-means clustering, and dimensionality reduction,
such as principal component analysis) and supervised methods (classification
and regression, such as k-nearest neighbour and linear regression). We will also address questions such as model selection using
cross-validation. The material has an important hands-on component and
readers should have a computer running R 3.4.1 or later.</p>
<div id="objectives-and-pre-requisites" class="section level2">
<h2><span class="header-section-number">2.1</span> Objectives and pre-requisites</h2>
<ul>
<li><p>The course aims at providing an accessible introduction to various
machine learning methods and applications in R. The core of the
courses focuses on unsupervised and supervised methods.</p></li>
<li><p>The course contains numerous exercises to provide numerous
opportunities to apply the newly acquired material.</p></li>
<li><p>Participants are expected to be familiar with the R syntax and basic
plotting functionality.</p></li>
<li><p>At the end of the course, the participants are anticipated to be
able to apply what they have learnt, as well as feel confident
enough to explore and apply new methods.</p></li>
</ul>
</div>
<div id="why-r" class="section level2">
<h2><span class="header-section-number">2.2</span> Why R?</h2>
<p>R is one of the major languages for data science. It provides
excellent visualisation features, which is essential to explore the
data before submitting it to any automated learning, as well as
assessing the results of the learning algorithm. Many R packages
for <a href="https://cran.r-project.org/">machine learning</a> are available off
the shelf and many modern methods in statistical learning are
implemented in R as part of their development.</p>
<p>There are however other viable alternatives that benefit from similar
advantages. If we consider Python for example,
the <a href="http://scikit-learn.org/stable/index.html">scikit-learn</a> library
provides all the tools that we will discuss in this course.</p>
</div>
<div id="overview-of-machine-learning-ml" class="section level2">
<h2><span class="header-section-number">2.3</span> Overview of machine learning (ML)</h2>
<p>In <strong>supervised learning</strong> (SML), the learning algorithm is presented
with labelled example inputs, where the labels indicate the desired
output. SML itself is composed of <strong>classification</strong>, where the output
is categorical, and <strong>regression</strong>, where the output is numerical.</p>
<p>In <strong>unsupervised learning</strong> (UML), no labels are provided, and the
learning algorithm focuses solely on detecting structure in unlabelled
input data.</p>
<p>Note that there are also <strong>semi-supervised learning</strong> approaches that
use labelled data to inform unsupervised learning on the unlabelled
data to identify and annotate new classes in the dataset (also called
novelty detection).</p>
<p><strong>Reinforcement learning</strong>, the learning algorithm performs a task
using feedback from operating in a real or synthetic environment.</p>
</div>
<div id="material-and-methods" class="section level2">
<h2><span class="header-section-number">2.4</span> Material and methods</h2>
<div id="example-data" class="section level3">
<h3><span class="header-section-number">2.4.1</span> Example data</h3>
<ul>
<li><em>Observations</em>, <em>examples</em> or simply <em>data points</em> along the rows</li>
<li><em>Features</em> or <em>variables</em> along the columns</li>
</ul>
<p>Using the <em>iris</em> data as an example, for UML, we would have 4 features
for each unlabelled example.</p>
<table>
<thead>
<tr class="header">
<th align="right">Sepal.Length</th>
<th align="right">Sepal.Width</th>
<th align="right">Petal.Length</th>
<th align="right">Petal.Width</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="right">5.1</td>
<td align="right">3.5</td>
<td align="right">1.4</td>
<td align="right">0.2</td>
</tr>
<tr class="even">
<td align="right">4.9</td>
<td align="right">3.0</td>
<td align="right">1.4</td>
<td align="right">0.2</td>
</tr>
<tr class="odd">
<td align="right">4.7</td>
<td align="right">3.2</td>
<td align="right">1.3</td>
<td align="right">0.2</td>
</tr>
<tr class="even">
<td align="right">4.6</td>
<td align="right">3.1</td>
<td align="right">1.5</td>
<td align="right">0.2</td>
</tr>
<tr class="odd">
<td align="right">5.0</td>
<td align="right">3.6</td>
<td align="right">1.4</td>
<td align="right">0.2</td>
</tr>
<tr class="even">
<td align="right">5.4</td>
<td align="right">3.9</td>
<td align="right">1.7</td>
<td align="right">0.4</td>
</tr>
</tbody>
</table>
<p>The same dataset used in the context of SML contains an additional
column of labels, documenting the outcome or class of each example.</p>
<table>
<thead>
<tr class="header">
<th align="left">Species</th>
<th align="right">Sepal.Length</th>
<th align="right">Sepal.Width</th>
<th align="right">Petal.Length</th>
<th align="right">Petal.Width</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left">setosa</td>
<td align="right">5.1</td>
<td align="right">3.5</td>
<td align="right">1.4</td>
<td align="right">0.2</td>
</tr>
<tr class="even">
<td align="left">setosa</td>
<td align="right">4.9</td>
<td align="right">3.0</td>
<td align="right">1.4</td>
<td align="right">0.2</td>
</tr>
<tr class="odd">
<td align="left">setosa</td>
<td align="right">4.7</td>
<td align="right">3.2</td>
<td align="right">1.3</td>
<td align="right">0.2</td>
</tr>
<tr class="even">
<td align="left">setosa</td>
<td align="right">4.6</td>
<td align="right">3.1</td>
<td align="right">1.5</td>
<td align="right">0.2</td>
</tr>
<tr class="odd">
<td align="left">setosa</td>
<td align="right">5.0</td>
<td align="right">3.6</td>
<td align="right">1.4</td>
<td align="right">0.2</td>
</tr>
<tr class="even">
<td align="left">setosa</td>
<td align="right">5.4</td>
<td align="right">3.9</td>
<td align="right">1.7</td>
<td align="right">0.4</td>
</tr>
</tbody>
</table>
<p>The different datasets that are used throughout the course are
collected and briefly described in the short <em>Data</em> chapter.</p>
</div>
<div id="packages" class="section level3">
<h3><span class="header-section-number">2.4.2</span> Packages</h3>
<p>We will be using, directly or indirectly, the following packages
through the chapters:</p>
<ul>
<li><em><a href="https://CRAN.R-project.org/package=caret">caret</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=ggplot2">ggplot2</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=mlbench">mlbench</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=class">class</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=caTools">caTools</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=randomForest">randomForest</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=impute">impute</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=ranger">ranger</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=kernlab">kernlab</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=class">class</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=glmnet">glmnet</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=naivebayes">naivebayes</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=rpart">rpart</a></em></li>
<li><em><a href="https://CRAN.R-project.org/package=rpart.plot">rpart.plot</a></em></li>
</ul>
<p>See the full session information for more details.</p>
<p>A more comprehensive list of machine learning libraries in R can be found at the <a href="https://cran.r-project.org/web/views/MachineLearning.html">CRAN Task View for Machine Learning and Statistical Learning</a>.</p>

</div>
</div>
</div>
            </section>

          </div>
        </div>
      </div>
<a href="index.html" class="navigation navigation-prev " aria-label="Previous page"><i class="fa fa-angle-left"></i></a>
<a href="example-datasets.html" class="navigation navigation-next " aria-label="Next page"><i class="fa fa-angle-right"></i></a>
    </div>
  </div>
<script src="libs/gitbook-2.6.7/js/app.min.js"></script>
<script src="libs/gitbook-2.6.7/js/lunr.js"></script>
<script src="libs/gitbook-2.6.7/js/clipboard.min.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-search.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-sharing.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-fontsettings.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-bookdown.js"></script>
<script src="libs/gitbook-2.6.7/js/jquery.highlight.js"></script>
<script src="libs/gitbook-2.6.7/js/plugin-clipboard.js"></script>
<script>
gitbook.require(["gitbook"], function(gitbook) {
gitbook.start({
"sharing": {
"github": false,
"facebook": true,
"twitter": true,
"linkedin": false,
"weibo": false,
"instapaper": false,
"vk": false,
"all": ["facebook", "twitter", "linkedin", "weibo", "instapaper"]
},
"fontsettings": {
"theme": "white",
"family": "sans",
"size": 2
},
"edit": {
"link": null,
"text": null
},
"history": {
"link": null,
"text": null
},
"view": {
"link": null,
"text": null
},
"download": null,
"toc": {
"collapse": "subsection"
}
});
});
</script>

<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
  (function () {
    var script = document.createElement("script");
    script.type = "text/javascript";
    var src = "true";
    if (src === "" || src === "true") src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML";
    if (location.protocol !== "file:")
      if (/^https?:/.test(src))
        src = src.replace(/^https?:/, '');
    script.src = src;
    document.getElementsByTagName("head")[0].appendChild(script);
  })();
</script>
</body>

</html>
